The goal of this script is to generate a Seurat object for sample 2022_14.
LogNormalize, then doublets
detection using scran hybrid and scDblFinder
method, and doublet cells removalLogNormalize, for only the remaining
cellsPCAtSNE and UMAPlibrary(dplyr)
library(patchwork)
library(ggplot2)
.libPaths()
## [1] "/usr/local/lib/R/library"
In this section, we set the global settings of the analysis. We will store data there :
out_dir = "."
We load the parameters :
sample_name = params$sample_name # "2021_31"
# sample_name = "2021_31"
Input count matrix is there :
count_matrix_dir = paste0(out_dir, "/input/", sample_name)
We load the markers and specific colors for each cell type :
cell_markers = readRDS(paste0(out_dir, "/../1_metadata/hs_hd_cell_markers.rds"))
cell_markers = lapply(cell_markers, FUN = toupper)
lengths(cell_markers)
## CD4 T cells CD8 T cells Langerhans cells macrophages
## 13 13 9 10
## B cells cuticle cortex medulla
## 16 15 16 10
## IRS proliferative IBL ORS
## 16 20 15 16
## IFE HFSC melanocytes sebocytes
## 17 17 10 8
Here are custom colors for each cell type :
color_markers = readRDS(paste0(out_dir, "/../1_metadata/hs_hd_color_markers.rds"))
data.frame(cell_type = names(color_markers),
color = unlist(color_markers)) %>%
ggplot2::ggplot(., aes(x = cell_type, y = 0, fill = cell_type)) +
ggplot2::geom_point(pch = 21, size = 5) +
ggplot2::scale_fill_manual(values = unlist(color_markers), breaks = names(color_markers)) +
ggplot2::theme_classic() +
ggplot2::theme(legend.position = "none",
axis.line = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
axis.text.y = element_blank())
We load markers to display on the dotplot :
dotplot_markers = readRDS(paste0(out_dir, "/../1_metadata/hs_hd_dotplot_markers.rds"))
dotplot_markers = lapply(dotplot_markers, FUN = toupper)
dotplot_markers
## $`CD4 T cells`
## [1] "CD3E" "CD4"
##
## $`CD8 T cells`
## [1] "CD3E" "CD8A"
##
## $`Langerhans cells`
## [1] "CD207" "CPVL"
##
## $macrophages
## [1] "TREM2" "MSR1"
##
## $`B cells`
## [1] "CD79A" "CD79B"
##
## $cuticle
## [1] "KRT32" "KRT35"
##
## $cortex
## [1] "KRT31" "PRR9"
##
## $medulla
## [1] "BAMBI" "ADLH1A3"
##
## $IRS
## [1] "KRT71" "KRT73"
##
## $proliferative
## [1] "TOP2A" "MCM5"
##
## $IBL
## [1] "KRT16" "KRT6C"
##
## $ORS
## [1] "KRT15" "GPX2"
##
## $IFE
## [1] "SPINK5" "LY6D"
##
## $HFSC
## [1] "DIO2" "TCEAL2"
##
## $melanocytes
## [1] "DCT" "MLANA"
##
## $sebocytes
## [1] "CLMP" "PPARG"
We load metadata for this sample :
sample_info = readRDS(paste0(out_dir, "/../1_metadata/hs_hd_sample_info.rds"))
sample_info %>%
dplyr::filter(project_name == sample_name)
## project_name sample_type sample_identifier gender color
## 1 2022_14 HS HS_5 F #8B3E2F
These is a parameter for different functions :
cl = aquarius::create_parallel_instance(nthreads = 3L)
cut_log_nCount_RNA = 6
cut_nFeature_RNA = 500
cut_percent.mt = 20
cut_percent.rb = 50
In this section, we load the raw count matrix. Then, we applied an empty droplets filtering.
sobj = aquarius::load_sc_data(data_path = count_matrix_dir,
sample_name = sample_name,
my_seed = 1337L)
## [1] 27955 6794880
## [1] 86911888
## [1] 27955 3953
## [1] 81304370
## [1] 0.9354804
sobj
## An object of class Seurat
## 27955 features across 3953 samples within 1 assay
## Active assay: RNA (27955 features, 0 variable features)
(Time to run : 160.08 s)
In genes metadata, we add the Ensembl ID. The
sobj@assays$RNA@meta.features dataframe contains three
information :
rownames : gene names stored as the dimnames of the
count matrix. Duplicated gene names will have a .1 at the
end of their nameEnsembl_ID : EnsemblID, as stored in the
features.tsv.gz filegene_name : gene_name, as stored in the
features.tsv.gz file. Duplicated gene names will have the
same name.features_df = read.csv(paste0(count_matrix_dir, "/features.tsv.gz"), sep = "\t", header = 0)
features_df = features_df[, c(1:2)]
colnames(features_df) = c("Ensembl_ID", "gene_name")
rownames(features_df) = rownames(sobj) # mandatory for Seurat::FindVariableFeatures
sobj@assays$RNA@meta.features = features_df
rm(features_df)
head(sobj@assays$RNA@meta.features)
## Ensembl_ID gene_name
## MIR1302-2HG ENSG00000243485 MIR1302-2HG
## FAM138A ENSG00000237613 FAM138A
## OR4F5 ENSG00000186092 OR4F5
## AL627309.1 ENSG00000238009 AL627309.1
## AL627309.3 ENSG00000239945 AL627309.3
## AL627309.4 ENSG00000241599 AL627309.4
We add the same columns as in metadata :
row_oi = (sample_info$project_name == sample_name)
sobj$project_name = sample_name
sobj$sample_identifier = sample_info[row_oi, "sample_identifier"]
sobj$sample_type = sample_info[row_oi, "sample_type"]
colnames(sobj@meta.data)
## [1] "orig.ident" "nCount_RNA" "nFeature_RNA"
## [4] "log_nCount_RNA" "project_name" "sample_identifier"
## [7] "sample_type"
sobj = Seurat::NormalizeData(sobj,
normalization.method = "LogNormalize",
assay = "RNA")
sobj = Seurat::FindVariableFeatures(sobj,
assay = "RNA",
nfeatures = 3000)
sobj
## An object of class Seurat
## 27955 features across 3953 samples within 1 assay
## Active assay: RNA (27955 features, 3000 variable features)
We generate a tSNE to visualize cells before filtering.
sobj = aquarius::dimensions_reduction(sobj = sobj,
assay = "RNA",
reduction = "pca",
max_dims = 100,
verbose = FALSE)
Seurat::ElbowPlot(sobj, ndims = 100, reduction = "RNA_pca")
We generate a tSNE with 20 principal components :
ndims = 20
sobj = Seurat::RunTSNE(sobj,
reduction = "RNA_pca",
dims = 1:ndims,
seed.use = 1337L,
reduction.name = paste0("RNA_pca_", ndims, "_tsne"))
sobj
## An object of class Seurat
## 27955 features across 3953 samples within 1 assay
## Active assay: RNA (27955 features, 3000 variable features)
## 2 dimensional reductions calculated: RNA_pca, RNA_pca_20_tsne
We annotate cells for cell type using
Seurat::AddModuleScore function.
sobj = aquarius::cell_annot_custom(sobj,
newname = "cell_type",
markers = cell_markers,
use_negative = TRUE,
add_score = TRUE,
verbose = TRUE)
colnames(sobj@meta.data) = stringr::str_replace_all(string = colnames(sobj@meta.data),
pattern = " ",
replacement = "_")
sobj$cell_type = factor(sobj$cell_type, levels = names(cell_markers))
table(sobj$cell_type)
##
## CD4 T cells CD8 T cells Langerhans cells macrophages
## 59 28 44 167
## B cells cuticle cortex medulla
## 12 454 132 78
## IRS proliferative IBL ORS
## 135 390 656 343
## IFE HFSC melanocytes sebocytes
## 414 455 512 74
(Time to run : 43.84 s)
To justify cell type annotation, we can make a dotplot :
markers = c("PTPRC", "MSX2", "KRT16",
unique(unlist(dotplot_markers[levels(sobj$cell_type)])))
markers = markers[markers %in% rownames(sobj)]
aquarius::plot_dotplot(sobj, assay = "RNA",
column_name = "cell_type",
markers = markers,
nb_hline = 0) +
ggplot2::scale_color_gradientn(colors = aquarius:::color_gene) +
ggplot2::theme(legend.position = "right",
legend.box = "vertical",
legend.direction = "vertical",
axis.title = element_blank(),
axis.text = element_text(size = 15))
We can make a barplot to see the composition of each dataset, and visualize cell types on the projection.
df_proportion = as.data.frame(prop.table(table(sobj$orig.ident,
sobj$cell_type)))
colnames(df_proportion) = c("orig.ident", "cell_type", "freq")
quantif = table(sobj$orig.ident) %>%
as.data.frame.table() %>%
`colnames<-`(c("orig.ident", "nb_cells"))
# Plot
plot_list = list()
plot_list[[2]] = aquarius::plot_barplot(df = df_proportion,
x = "orig.ident",
y = "freq",
fill = "cell_type",
position = ggplot2::position_fill()) +
ggplot2::scale_fill_manual(name = "Cell type",
values = color_markers[levels(df_proportion$cell_type)],
breaks = levels(df_proportion$cell_type)) +
ggplot2::geom_label(data = quantif, inherit.aes = FALSE,
aes(x = orig.ident, y = 1.05, label = nb_cells),
label.size = 0)
plot_list[[1]] = Seurat::DimPlot(sobj, group.by = "cell_type") +
ggplot2::scale_color_manual(values = unlist(color_markers),
breaks = names(color_markers)) +
ggplot2::labs(title = sample_name,
subtitle = paste0(ncol(sobj), " cells")) +
Seurat::NoLegend() + Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
patchwork::wrap_plots(plot_list, nrow = 1, widths = c(6, 1))
We annotate cells for cell cycle phase using Seurat and
cyclone.
cc_columns = aquarius::add_cell_cycle(sobj = sobj,
assay = "RNA",
species_rdx = "hs",
BPPARAM = cl)@meta.data[, c("Seurat.Phase", "Phase")]
##
## G1 G2M S
## 2208 454 1290
sobj$Seurat.Phase = cc_columns$Seurat.Phase
sobj$cyclone.Phase = cc_columns$Phase
table(sobj$Seurat.Phase, sobj$cyclone.Phase)
##
## G1 G2M S
## G1 1680 192 902
## G2M 232 224 90
## S 296 38 298
(Time to run : 375.46 s)
We visualize cell cycle on the projection :
plot_list = list()
plot_list[[2]] = Seurat::DimPlot(sobj, group.by = "Seurat.Phase") +
ggplot2::labs(title = "Cell Cycle Phase",
subtitle = "Seurat.Phase") +
Seurat::NoLegend() + Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
plot_list[[1]] = Seurat::DimPlot(sobj, group.by = "cyclone.Phase") +
ggplot2::labs(title = "Cell Cycle Phase",
subtitle = "cyclone.Phase") +
Seurat::NoLegend() + Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
patchwork::wrap_plots(plot_list, nrow = 1)
In this section, we look at the number of genes expressed by each cell, the number of UMI, the percentage of mitochondrial genes expressed, and the percentage of ribosomal genes expressed. Then, without taking into account the cells expressing low number of genes or have low number of UMI, we identify doublet cells.
We compute four quality metrics :
sobj = Seurat::PercentageFeatureSet(sobj, pattern = "^MT", col.name = "percent.mt")
sobj = Seurat::PercentageFeatureSet(sobj, pattern = "^RP[L|S][0-9]*$", col.name = "percent.rb")
head(sobj@meta.data)
## orig.ident nCount_RNA nFeature_RNA log_nCount_RNA
## AAACCCAAGTCAGGGT-1 2022_14 37323 5732 10.527365
## AAACCCACAACCCTAA-1 2022_14 133 104 4.890349
## AAACCCACATCGATCA-1 2022_14 44949 6163 10.713284
## AAACCCACATCTTCGC-1 2022_14 133 116 4.890349
## AAACCCAGTATCGCGC-1 2022_14 121 111 4.795791
## AAACCCAGTGAGTGAC-1 2022_14 13927 3386 9.541585
## project_name sample_identifier sample_type score_CD4_T_cells
## AAACCCAAGTCAGGGT-1 2022_14 HS_5 HS -0.067519478
## AAACCCACAACCCTAA-1 2022_14 HS_5 HS -0.011353291
## AAACCCACATCGATCA-1 2022_14 HS_5 HS -0.086985786
## AAACCCACATCTTCGC-1 2022_14 HS_5 HS -0.011028343
## AAACCCAGTATCGCGC-1 2022_14 HS_5 HS -0.007133888
## AAACCCAGTGAGTGAC-1 2022_14 HS_5 HS -0.073180622
## score_CD8_T_cells score_Langerhans_cells score_macrophages
## AAACCCAAGTCAGGGT-1 0.000194117 -0.045754796 -0.106069443
## AAACCCACAACCCTAA-1 -0.011316815 -0.007107506 -0.009180516
## AAACCCACATCGATCA-1 -0.036376814 -0.083526676 -0.111252003
## AAACCCACATCTTCGC-1 -0.006960970 -0.005679166 -0.013770774
## AAACCCAGTATCGCGC-1 -0.007662896 -0.011603086 -0.009378342
## AAACCCAGTGAGTGAC-1 -0.027138623 -0.071717894 -0.084498031
## score_B_cells score_cuticle score_cortex score_medulla
## AAACCCAAGTCAGGGT-1 -0.004308593 -0.34535241 -0.25227717 0.31076919
## AAACCCACAACCCTAA-1 0.000000000 -0.07227883 -0.01976912 -0.02343547
## AAACCCACATCGATCA-1 -0.004582676 -0.44463572 -0.18098495 -0.17437442
## AAACCCACATCTTCGC-1 0.000000000 0.77541202 0.26420719 -0.01785048
## AAACCCAGTATCGCGC-1 -0.006247745 0.20476684 -0.02835530 0.41459736
## AAACCCAGTGAGTGAC-1 -0.006127870 -0.29709161 -0.13326200 -0.04665638
## score_IRS score_proliferative score_IBL score_ORS
## AAACCCAAGTCAGGGT-1 1.00179362 -0.156481169 -0.4967240 -0.54747567
## AAACCCACAACCCTAA-1 -0.03160249 -0.014708770 0.1646206 -0.07133058
## AAACCCACATCGATCA-1 -0.01122108 -0.174088047 -0.2804257 0.01784590
## AAACCCACATCTTCGC-1 -0.02355474 -0.014708770 -0.1064054 -0.07054564
## AAACCCAGTATCGCGC-1 0.22124139 -0.009015432 -0.1171467 -0.06557892
## AAACCCAGTGAGTGAC-1 -0.05177322 -0.148989101 -0.4988876 0.49625084
## score_IFE score_HFSC score_melanocytes score_sebocytes
## AAACCCAAGTCAGGGT-1 -0.15812240 -0.40316392 -0.46839854 -0.11532130
## AAACCCACAACCCTAA-1 -0.05997666 0.48772271 -0.04552078 -0.05716701
## AAACCCACATCGATCA-1 1.31403181 -0.28696556 -0.47011190 -0.06104338
## AAACCCACATCTTCGC-1 -0.09054941 -0.06606615 -0.06528211 -0.04769522
## AAACCCAGTATCGCGC-1 -0.07973852 -0.07086559 0.81980729 -0.05173921
## AAACCCAGTGAGTGAC-1 -0.40217356 1.18879779 -0.27004576 -0.14304954
## cell_type Seurat.Phase cyclone.Phase percent.mt percent.rb
## AAACCCAAGTCAGGGT-1 IRS G1 G1 5.795354 23.703882
## AAACCCACAACCCTAA-1 HFSC G2M S 6.015038 15.037594
## AAACCCACATCGATCA-1 IFE G1 S 7.027965 22.825869
## AAACCCACATCTTCGC-1 cuticle G1 G2M 4.511278 21.804511
## AAACCCAGTATCGCGC-1 melanocytes G2M G2M 2.479339 7.438017
## AAACCCAGTGAGTGAC-1 HFSC G1 S 5.485747 26.703526
We get the cell barcodes for the failing cells :
fail_percent.mt = sobj@meta.data %>% dplyr::filter(percent.mt > cut_percent.mt) %>% rownames()
fail_percent.rb = sobj@meta.data %>% dplyr::filter(percent.rb > cut_percent.rb) %>% rownames()
fail_log_nCount_RNA = sobj@meta.data %>% dplyr::filter(log_nCount_RNA < cut_log_nCount_RNA) %>% rownames()
fail_nFeature_RNA = sobj@meta.data %>% dplyr::filter(nFeature_RNA < cut_nFeature_RNA) %>% rownames()
Without taking into account the low UMI and low number of features cells, we identify doublets.
fsobj = subset(sobj, invert = TRUE,
cells = unique(c(fail_log_nCount_RNA, fail_nFeature_RNA)))
fsobj
## An object of class Seurat
## 27955 features across 3272 samples within 1 assay
## Active assay: RNA (27955 features, 3000 variable features)
## 2 dimensional reductions calculated: RNA_pca, RNA_pca_20_tsne
On this filtered dataset, we apply doublet cells detection. Just before, we run the normalization, taking into account only the remaining cells.
sobj = Seurat::NormalizeData(sobj,
normalization.method = "LogNormalize",
assay = "RNA")
sobj = Seurat::FindVariableFeatures(sobj,
assay = "RNA",
nfeatures = 3000)
sobj
## An object of class Seurat
## 27955 features across 3953 samples within 1 assay
## Active assay: RNA (27955 features, 3000 variable features)
## 2 dimensional reductions calculated: RNA_pca, RNA_pca_20_tsne
We identify doublet cells :
fsobj = aquarius::find_doublets(sobj = fsobj,
BPPARAM = cl)
## [1] 27955 3272
##
## FALSE TRUE
## 2995 277
## [18:07:36] WARNING: amalgamation/../src/learner.cc:1095: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.
##
## FALSE TRUE
## 3006 266
##
## FALSE TRUE
## 2854 418
fail_doublets_consensus = Seurat::WhichCells(fsobj, expression = doublets_consensus.class)
fail_doublets_scDblFinder = Seurat::WhichCells(fsobj, expression = scDblFinder.class)
fail_doublets_hybrid = Seurat::WhichCells(fsobj, expression = hybrid_score.class)
(Time to run : 104.29 s)
We add the information in the non filtered Seurat object :
sobj$doublets_consensus.class = dplyr::case_when(!(colnames(sobj) %in% colnames(fsobj)) ~ NA,
colnames(sobj) %in% fail_doublets_consensus ~ TRUE,
!(colnames(sobj) %in% fail_doublets_consensus) ~ FALSE)
sobj$scDblFinder.class = dplyr::case_when(!(colnames(sobj) %in% colnames(fsobj)) ~ NA,
colnames(sobj) %in% fail_doublets_scDblFinder ~ TRUE,
!(colnames(sobj) %in% fail_doublets_scDblFinder) ~ FALSE)
sobj$hybrid_score.class = dplyr::case_when(!(colnames(sobj) %in% colnames(fsobj)) ~ NA,
colnames(sobj) %in% fail_doublets_hybrid ~ TRUE,
!(colnames(sobj) %in% fail_doublets_hybrid) ~ FALSE)
We can visualize the 4 cells quality with a Venn diagram :
n_filtered = c(fail_percent.mt, fail_percent.rb, fail_log_nCount_RNA, fail_nFeature_RNA) %>%
unique() %>% length()
percent_filtered = round(100*(n_filtered/ncol(sobj)), 2)
ggvenn::ggvenn(list(percent.mt = fail_percent.mt,
percent.rb = fail_percent.rb,
log_nCount_RNA = fail_log_nCount_RNA,
nFeature_RNA = fail_nFeature_RNA),
fill_color = c("#0073C2FF", "#EFC000FF", "orange", "pink"),
stroke_size = 0.5, set_name_size = 4) +
ggplot2::labs(title = "Filtered out cells",
subtitle = paste0(n_filtered, " cells (", percent_filtered, " % of all cells)")) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
To visualize the threshold for number of UMI, we can make a histogram :
aquarius::plot_qc_density(df = sobj@meta.data,
x = "log_nCount_RNA",
bins = 200,
group_by = "orig.ident",
group_color = setNames(sample_info$color,
nm = sample_info$sample_identifiant),
x_thresh = cut_log_nCount_RNA)
Seurat::VlnPlot(sobj, features = "log_nCount_RNA", pt.size = 0.001,
group.by = "cell_type", cols = color_markers) +
ggplot2::scale_fill_manual(values = color_markers, breaks = names(color_markers)) +
ggplot2::geom_hline(yintercept = cut_log_nCount_RNA, col = "red") +
ggplot2::labs(x = "")
sobj$fail = ifelse(colnames(sobj) %in% fail_log_nCount_RNA,
yes = as.character(sobj$cell_type), no = NA)
sobj$fail = factor(sobj$fail, levels = c(levels(sobj$cell_type), NA))
Seurat::DimPlot(sobj, group.by = "fail", na.value = "gray80", cols = color_markers) +
ggplot2::labs(title = "log_nCount_RNA",
subtitle = paste0(length(fail_log_nCount_RNA), " cells")) +
Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
To visualize the threshold for number of features, we can make a histogram :
aquarius::plot_qc_density(df = sobj@meta.data,
x = "nFeature_RNA",
bins = 200,
group_by = "orig.ident",
group_color = setNames(sample_info$color,
nm = sample_info$sample_identifiant),
x_thresh = cut_nFeature_RNA)
Seurat::VlnPlot(sobj, features = "nFeature_RNA", pt.size = 0.001,
group.by = "cell_type", cols = color_markers) +
ggplot2::scale_fill_manual(values = color_markers, breaks = names(color_markers)) +
ggplot2::geom_hline(yintercept = cut_nFeature_RNA, col = "red") +
ggplot2::labs(x = "")
sobj$fail = ifelse(colnames(sobj) %in% fail_nFeature_RNA,
yes = as.character(sobj$cell_type), no = NA)
sobj$fail = factor(sobj$fail, levels = c(levels(sobj$cell_type), NA))
Seurat::DimPlot(sobj, group.by = "fail", na.value = "gray80", cols = color_markers) +
ggplot2::labs(title = "nFeature_RNA",
subtitle = paste0(length(fail_nFeature_RNA), " cells")) +
Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
To identify a threshold for mitochondrial gene expression, we can make a histogram :
aquarius::plot_qc_density(df = sobj@meta.data,
x = "percent.mt",
bins = 200,
group_by = "orig.ident",
group_color = setNames(sample_info$color,
nm = sample_info$sample_identifiant),
x_thresh = cut_percent.mt)
Seurat::VlnPlot(sobj, features = "percent.mt", pt.size = 0.001,
group.by = "cell_type", cols = color_markers) +
ggplot2::scale_fill_manual(values = color_markers, breaks = names(color_markers)) +
ggplot2::geom_hline(yintercept = cut_percent.mt, col = "red") +
ggplot2::labs(x = "")
sobj$fail = ifelse(colnames(sobj) %in% fail_percent.mt,
yes = as.character(sobj$cell_type), no = NA)
sobj$fail = factor(sobj$fail, levels = c(levels(sobj$cell_type), NA))
Seurat::DimPlot(sobj, group.by = "fail", na.value = "gray80", cols = color_markers) +
ggplot2::labs(title = "percent.mt",
subtitle = paste0(length(fail_percent.mt), " cells")) +
Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
To identify a threshold for ribosomal gene expression, we can make a histogram :
aquarius::plot_qc_density(df = sobj@meta.data,
x = "percent.rb",
bins = 200,
group_by = "orig.ident",
group_color = setNames(sample_info$color,
nm = sample_info$sample_identifiant),
x_thresh = cut_percent.rb)
Seurat::VlnPlot(sobj, features = "percent.rb", pt.size = 0.001,
group.by = "cell_type", cols = color_markers) +
ggplot2::scale_fill_manual(values = color_markers, breaks = names(color_markers)) +
ggplot2::geom_hline(yintercept = cut_percent.rb, col = "red") +
ggplot2::labs(x = "")
sobj$fail = ifelse(colnames(sobj) %in% fail_percent.rb,
yes = as.character(sobj$cell_type), no = NA)
sobj$fail = factor(sobj$fail, levels = c(levels(sobj$cell_type), NA))
Seurat::DimPlot(sobj, group.by = "fail", na.value = "gray80", cols = color_markers) +
ggplot2::labs(title = "percent.rb",
subtitle = paste0(length(fail_percent.rb), " cells")) +
Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
We would like to see if the number of feature expressed by cell, and
the number of UMI is correlated with the cell type, the percentage of
mitochondrial and ribosomal gene expressed, and the doublet status. We
build the log_nCount_RNA by nFeature_RNA
figure, where cells (dots) are colored by these different metrics.
This is the figure, colored by cell type :
aquarius::plot_qc_facslike(df = sobj@meta.data,
x = "nFeature_RNA",
y = "log_nCount_RNA",
col_by = "cell_type",
col_colors = unname(color_markers),
x_thresh = cut_nFeature_RNA,
y_thresh = cut_log_nCount_RNA,
bins = 200)
This is the figure, colored by the percentage of mitochondrial genes expressed in cell :
aquarius::plot_qc_facslike(df = sobj@meta.data,
x = "nFeature_RNA",
y = "log_nCount_RNA",
col_by = "percent.mt",
x_thresh = cut_nFeature_RNA,
y_thresh = cut_log_nCount_RNA,
bins = 200)
This is the figure, colored by the percentage of ribosomal genes expressed in cell :
aquarius::plot_qc_facslike(df = sobj@meta.data,
x = "nFeature_RNA",
y = "log_nCount_RNA",
col_by = "percent.rb",
x_thresh = cut_nFeature_RNA,
y_thresh = cut_log_nCount_RNA,
bins = 200)
This is the figure, colored by the doublet cells status
(doublets_consensus.class) :
aquarius::plot_qc_facslike(df = sobj@meta.data,
x = "nFeature_RNA",
y = "log_nCount_RNA",
col_by = "doublets_consensus.class",
col_colors = setNames(nm = c(TRUE, FALSE),
aquarius::gg_color_hue(2)),
x_thresh = cut_nFeature_RNA,
y_thresh = cut_log_nCount_RNA,
bins = 200)
This is the figure, colored by the doublet cells status
(scDblFinder.class) :
aquarius::plot_qc_facslike(df = sobj@meta.data,
x = "nFeature_RNA",
y = "log_nCount_RNA",
col_by = "scDblFinder.class",
col_colors = setNames(nm = c(TRUE, FALSE),
aquarius::gg_color_hue(2)),
x_thresh = cut_nFeature_RNA,
y_thresh = cut_log_nCount_RNA,
bins = 200)
This is the figure, colored by the doublet cells status
(hybrid_score.class) :
aquarius::plot_qc_facslike(df = sobj@meta.data,
x = "nFeature_RNA",
y = "log_nCount_RNA",
col_by = "hybrid_score.class",
col_colors = setNames(nm = c(TRUE, FALSE),
aquarius::gg_color_hue(2)),
x_thresh = cut_nFeature_RNA,
y_thresh = cut_log_nCount_RNA,
bins = 200)
Do filtered cells belong to a particular cell type ?
sobj$all_cells = TRUE
plot_list = list()
## All cells
df = sobj@meta.data
if (nrow(df) == 0) {
plot_list[[1]] = ggplot()
} else {
plot_list[[1]] = aquarius::plot_piechart(df = df,
logical_var = "all_cells",
grouping_var = "cell_type",
colors = color_markers,
display_legend = TRUE) +
ggplot2::labs(title = "All cells",
subtitle = paste(nrow(df), "cells")) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
}
## Doublets consensus
df = sobj@meta.data %>%
dplyr::filter(doublets_consensus.class)
if (nrow(df) == 0) {
plot_list[[2]] = ggplot()
} else {
plot_list[[2]] = aquarius::plot_piechart(df = df,
logical_var = "all_cells",
grouping_var = "cell_type",
colors = color_markers,
display_legend = TRUE) +
ggplot2::labs(title = "doublets_consensus.class",
subtitle = paste(sum(sobj$doublets_consensus.class, na.rm = TRUE), "cells")) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
}
## percent.mt
df = sobj@meta.data %>%
dplyr::filter(percent.mt > cut_percent.mt)
if (nrow(df) == 0) {
plot_list[[3]] = ggplot()
} else {
plot_list[[3]] = aquarius::plot_piechart(df = df,
logical_var = "all_cells",
grouping_var = "cell_type",
colors = color_markers,
display_legend = TRUE) +
ggplot2::labs(title = paste("percent.mt >", cut_percent.mt),
subtitle = paste(length(fail_percent.mt), "cells")) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
}
## percent.rb
df = sobj@meta.data %>%
dplyr::filter(percent.rb > cut_percent.rb)
if (nrow(df) == 0) {
plot_list[[4]] = ggplot()
} else {
plot_list[[4]] = aquarius::plot_piechart(df = df,
logical_var = "all_cells",
grouping_var = "cell_type",
colors = color_markers,
display_legend = TRUE) +
ggplot2::labs(title = paste("percent.rb >", cut_percent.rb),
subtitle = paste(length(fail_percent.rb), "cells")) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
}
## log_nCount_RNA
df = sobj@meta.data %>%
dplyr::filter(log_nCount_RNA < cut_log_nCount_RNA)
if (nrow(df) == 0) {
plot_list[[5]] = ggplot()
} else {
plot_list[[5]] = aquarius::plot_piechart(df = df,
logical_var = "all_cells",
grouping_var = "cell_type",
colors = color_markers,
display_legend = TRUE) +
ggplot2::labs(title = paste("log_nCount_RNA <", round(cut_log_nCount_RNA, 2)),
subtitle = paste(length(fail_log_nCount_RNA), "cells")) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
}
## nFeature_RNA
df = sobj@meta.data %>%
dplyr::filter(nFeature_RNA < cut_nFeature_RNA)
if (nrow(df) == 0) {
plot_list[[6]] = ggplot()
} else {
plot_list[[6]] = aquarius::plot_piechart(df = df,
logical_var = "all_cells",
grouping_var = "cell_type",
colors = color_markers,
display_legend = TRUE) +
ggplot2::labs(title = paste("nFeature_RNA <", round(cut_nFeature_RNA, 2)),
subtitle = paste(length(fail_nFeature_RNA), "cells")) +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5))
}
patchwork::wrap_plots(plot_list, ncol = 3) +
patchwork::plot_layout(guides = "collect") &
ggplot2::theme(legend.position = "right")
We can compare doublet detection methods with a Venn diagram :
ggvenn::ggvenn(list(hybrid = fail_doublets_hybrid,
scDblFinder = fail_doublets_scDblFinder),
fill_color = c("#0073C2FF", "#EFC000FF"),
stroke_size = 0.5, set_name_size = 4) +
ggplot2::ggtitle(label = "Doublet cells") +
ggplot2::theme(plot.title = element_text(hjust = 0.5, face = "bold"))
We visualize cells annotation for doublets :
plot_list = list()
# scDblFinder.class
sobj$fail = ifelse(sobj$scDblFinder.class,
yes = as.character(sobj$cell_type), no = NA)
sobj$fail = factor(sobj$fail, levels = c(levels(sobj$cell_type), NA))
plot_list[[1]] = Seurat::DimPlot(sobj, group.by = "fail",
na.value = "gray80", cols = color_markers) +
ggplot2::labs(title = "scDblFinder.class",
subtitle = paste0(sum(sobj$scDblFinder.class, na.rm = TRUE), " cells")) +
Seurat::NoAxes() + Seurat::NoLegend() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
# hybrid_score.class
sobj$fail = ifelse(sobj$hybrid_score.class,
yes = as.character(sobj$cell_type), no = NA)
sobj$fail = factor(sobj$fail, levels = c(levels(sobj$cell_type), NA))
plot_list[[2]] = Seurat::DimPlot(sobj, group.by = "fail",
na.value = "gray80", cols = color_markers) +
ggplot2::labs(title = "hybrid_score.class",
subtitle = paste0(sum(sobj$hybrid_score.class, na.rm = TRUE), " cells")) +
Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
sobj$fail = NULL
# Plot
patchwork::wrap_plots(plot_list, nrow = 1)
What is the composition of doublet cells ? We just look at score for each cell type.
sobj$orig.ident.doublets = case_when(is.na(sobj$doublets_consensus.class) ~ "bad quality",
sobj$doublets_consensus.class == TRUE ~ paste0(sobj$orig.ident, " doublets"),
sobj$doublets_consensus.class == FALSE ~ "not doublet")
sobj$orig.ident.doublets = factor(sobj$orig.ident.doublets,
levels = c(paste0(as.character(sample_info$sample_identifiant), " doublets"),
"bad quality", "not doublet"))
doublets_compo = function(score1, score2) {
type1 = unlist(lapply(stringr::str_split(score1, pattern = "score_"), `[[`, 2))
type2 = unlist(lapply(stringr::str_split(score2, pattern = "score_"), `[[`, 2))
if (type1 == type2) {
the_title = "Homotypic doublet"
the_subtitle = type1
score1 = "log_nCount_RNA"
} else {
the_title = "Heterotypic doublet"
the_subtitle = paste(type1, type2, sep = " + ")
}
p = sobj@meta.data %>%
dplyr::arrange(desc(orig.ident.doublets)) %>%
ggplot2::ggplot(., aes(x = eval(parse(text = score1)),
y = eval(parse(text = score2)),
col = orig.ident.doublets)) +
ggplot2::geom_point(size = 0.25) +
ggplot2::scale_color_manual(values = c(sample_info$color, "gray90", "gray60"),
breaks = c(paste0(as.character(sample_info$sample_identifiant), " doublets"),
"bad quality", "not doublet")) +
ggplot2::labs(x = score1, y = score2,
title = the_title, subtitle = the_subtitle) +
ggplot2::theme_classic() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
return(p)
}
score_columns = grep(x = colnames(sobj@meta.data),
pattern = "^score",
value = TRUE)
combinations = expand.grid(score_columns, score_columns) %>%
apply(., 1, sort) %>% t() %>%
as.data.frame()
combinations = combinations[!duplicated(combinations), ]
plot_list = apply(combinations, 1, FUN = function(elem) {
doublets_compo(elem[1], elem[2])
})
sobj$orig.ident.doublets = NULL
patchwork::wrap_plots(plot_list, ncol = 4) +
patchwork::plot_layout(guides = "collect") &
ggplot2::theme(legend.position = "right")
We could save this object before filtering (remove
eval = FALSE) :
saveRDS(sobj, paste0(out_dir, "/datasets/", sample_name, "_sobj_unfiltered.rds"))
We remove :
sobj = subset(sobj, invert = TRUE,
cells = unique(c(fail_log_nCount_RNA, fail_nFeature_RNA,
fail_percent.mt, fail_percent.rb,
fail_doublets_consensus)))
sobj
## An object of class Seurat
## 27955 features across 2588 samples within 1 assay
## Active assay: RNA (27955 features, 3000 variable features)
## 2 dimensional reductions calculated: RNA_pca, RNA_pca_20_tsne
We normalize the count matrix for remaining cells :
sobj = Seurat::NormalizeData(sobj,
normalization.method = "LogNormalize",
assay = "RNA")
sobj = Seurat::FindVariableFeatures(sobj,
assay = "RNA",
nfeatures = 3000)
sobj
## An object of class Seurat
## 27955 features across 2588 samples within 1 assay
## Active assay: RNA (27955 features, 3000 variable features)
## 2 dimensional reductions calculated: RNA_pca, RNA_pca_20_tsne
We perform a PCA :
sobj = aquarius::dimensions_reduction(sobj = sobj,
assay = "RNA",
reduction = "pca",
max_dims = 100,
verbose = FALSE)
Seurat::ElbowPlot(sobj, ndims = 100, reduction = "RNA_pca")
We generate a tSNE and a UMAP with 20 principal components :
ndims = 20
sobj = Seurat::RunTSNE(sobj,
reduction = "RNA_pca",
dims = 1:ndims,
seed.use = 1337L,
reduction.name = paste0("RNA_pca_", ndims, "_tsne"))
sobj = Seurat::RunUMAP(sobj,
reduction = "RNA_pca",
dims = 1:ndims,
seed.use = 1337L,
reduction.name = paste0("RNA_pca_", ndims, "_umap"))
We annotate cells for cell type, with the new normalized expression matrix :
score_columns = grep(x = colnames(sobj@meta.data), pattern = "^score", value = TRUE)
sobj@meta.data[, score_columns] = NULL
sobj$cell_type = NULL
sobj = aquarius::cell_annot_custom(sobj,
newname = "cell_type",
markers = cell_markers,
use_negative = TRUE,
add_score = TRUE,
verbose = TRUE)
sobj$cell_type = factor(sobj$cell_type, levels = names(cell_markers))
colnames(sobj@meta.data) = stringr::str_replace_all(string = colnames(sobj@meta.data),
pattern = " ",
replacement = "_")
table(sobj$cell_type)
##
## CD4 T cells CD8 T cells Langerhans cells macrophages
## 51 19 28 142
## B cells cuticle cortex medulla
## 2 340 86 45
## IRS proliferative IBL ORS
## 79 263 493 208
## IFE HFSC melanocytes sebocytes
## 300 320 165 47
(Time to run : 28.91 s)
To justify cell type annotation, we can make a dotplot :
markers = c("PTPRC", unique(unlist(dotplot_markers[levels(sobj$cell_type)])))
markers = markers[markers %in% rownames(sobj)]
aquarius::plot_dotplot(sobj, assay = "RNA",
column_name = "cell_type",
markers = markers,
nb_hline = 0) +
ggplot2::scale_color_gradientn(colors = aquarius:::color_gene) +
ggplot2::theme(legend.position = "right",
legend.box = "vertical",
legend.direction = "vertical",
axis.title = element_blank(),
axis.text = element_text(size = 15))
We can make a barplot to see the composition of each dataset, and visualize cell types on the projection.
df_proportion = as.data.frame(prop.table(table(sobj$orig.ident,
sobj$cell_type)))
colnames(df_proportion) = c("orig.ident", "cell_type", "freq")
quantif = table(sobj$orig.ident) %>%
as.data.frame.table() %>%
`colnames<-`(c("orig.ident", "nb_cells"))
# Plot
plot_list = list()
plot_list[[2]] = aquarius::plot_barplot(df = df_proportion,
x = "orig.ident",
y = "freq",
fill = "cell_type",
position = ggplot2::position_fill()) +
ggplot2::scale_fill_manual(name = "Cell type",
values = color_markers[levels(df_proportion$cell_type)],
breaks = levels(df_proportion$cell_type)) +
ggplot2::geom_label(data = quantif, inherit.aes = FALSE,
aes(x = orig.ident, y = 1.05, label = nb_cells),
label.size = 0)
plot_list[[1]] = Seurat::DimPlot(sobj, group.by = "cell_type",
reduction = "RNA_pca_20_tsne") +
ggplot2::scale_color_manual(values = unlist(color_markers),
breaks = names(color_markers)) +
ggplot2::labs(title = sample_name,
subtitle = paste0(ncol(sobj), " cells")) +
Seurat::NoLegend() + Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
patchwork::wrap_plots(plot_list, nrow = 1, widths = c(6, 1))
We annotate cells for cell cycle phase :
cc_columns = aquarius::add_cell_cycle(sobj = sobj,
assay = "RNA",
species_rdx = "hs",
BPPARAM = cl)@meta.data[, c("Seurat.Phase", "Phase")]
##
## G1 G2M S
## 1631 171 786
sobj$Seurat.Phase = cc_columns$Seurat.Phase
sobj$cyclone.Phase = cc_columns$Phase
table(sobj$Seurat.Phase, sobj$cyclone.Phase)
##
## G1 G2M S
## G1 1287 47 581
## G2M 139 118 39
## S 205 6 166
(Time to run : 280.26 s)
We visualize cell cycle on the projection :
plot_list = list()
plot_list[[2]] = Seurat::DimPlot(sobj, group.by = "Seurat.Phase",
reduction = "RNA_pca_20_tsne") +
ggplot2::labs(title = "Cell Cycle Phase",
subtitle = "Seurat.Phase") +
Seurat::NoLegend() + Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
plot_list[[1]] = Seurat::DimPlot(sobj, group.by = "cyclone.Phase",
reduction = "RNA_pca_20_tsne") +
ggplot2::labs(title = "Cell Cycle Phase",
subtitle = "cyclone.Phase") +
Seurat::NoLegend() + Seurat::NoAxes() +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
patchwork::wrap_plots(plot_list, nrow = 1)
We make a highly resolutive clustering :
sobj = Seurat::FindNeighbors(sobj, reduction = "RNA_pca", dims = c(1:ndims))
sobj = Seurat::FindClusters(sobj, resolution = 2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2588
## Number of edges: 75084
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8234
## Number of communities: 27
## Elapsed time: 0 seconds
table(sobj$seurat_clusters)
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
## 247 221 207 201 145 136 126 124 116 113 111 108 96 74 69 68 63 62 43 43
## 20 21 22 23 24 25 26
## 42 42 36 35 24 20 16
We can visualize the cell type :
tsne = Seurat::DimPlot(sobj, group.by = "cell_type",
reduction = paste0("RNA_pca_", ndims, "_tsne"), cols = color_markers) +
Seurat::NoAxes() + ggplot2::ggtitle("tSNE") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
legend.position = "none")
umap = Seurat::DimPlot(sobj, group.by = "cell_type",
reduction = paste0("RNA_pca_", ndims, "_umap"), cols = color_markers) +
Seurat::NoAxes() + ggplot2::ggtitle("UMAP") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
tsne | umap
We can visualize the cell cycle, from Seurat :
tsne = Seurat::DimPlot(sobj, group.by = "Seurat.Phase",
reduction = paste0("RNA_pca_", ndims, "_tsne")) +
Seurat::NoAxes() + ggplot2::ggtitle("tSNE") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
legend.position = "none")
umap = Seurat::DimPlot(sobj, group.by = "Seurat.Phase",
reduction = paste0("RNA_pca_", ndims, "_umap")) +
Seurat::NoAxes() + ggplot2::ggtitle("UMAP") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
tsne | umap
We can visualize the cell cycle, from cyclone :
tsne = Seurat::DimPlot(sobj, group.by = "cyclone.Phase",
reduction = paste0("RNA_pca_", ndims, "_tsne")) +
Seurat::NoAxes() + ggplot2::ggtitle("tSNE") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
legend.position = "none")
umap = Seurat::DimPlot(sobj, group.by = "cyclone.Phase",
reduction = paste0("RNA_pca_", ndims, "_umap")) +
Seurat::NoAxes() + ggplot2::ggtitle("UMAP") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
tsne | umap
We visualize the clustering :
tsne = Seurat::DimPlot(sobj, group.by = "seurat_clusters", label = TRUE,
reduction = paste0("RNA_pca_", ndims, "_tsne")) +
Seurat::NoAxes() + ggplot2::ggtitle("tSNE") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5),
legend.position = "none")
umap = Seurat::DimPlot(sobj, group.by = "seurat_clusters", label = TRUE,
reduction = paste0("RNA_pca_", ndims, "_umap")) +
Seurat::NoAxes() + ggplot2::ggtitle("UMAP") +
ggplot2::theme(aspect.ratio = 1,
plot.title = element_text(hjust = 0.5))
tsne | umap
We visualize all cell types markers on the tSNE :
markers = dotplot_markers %>% unlist() %>% unname()
markers = markers[markers %in% rownames(sobj)]
plot_list = lapply(markers,
FUN = function(one_gene) {
p = Seurat::FeaturePlot(sobj, features = one_gene,
reduction = paste0("RNA_pca_", ndims, "_tsne")) +
ggplot2::labs(title = one_gene) +
ggplot2::scale_color_gradientn(colors = aquarius::color_gene) +
ggplot2::theme(aspect.ratio = 1,
plot.subtitle = element_text(hjust = 0.5)) +
Seurat::NoAxes()
return(p)
})
patchwork::wrap_plots(plot_list, ncol = 4)
We save the annotated and filtered Seurat object :
saveRDS(sobj, file = paste0(out_dir, "/datasets/", sample_name, "_sobj_filtered.rds"))
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.6 LTS
##
## Matrix products: default
## BLAS: /usr/local/lib/R/lib/libRblas.so
## LAPACK: /usr/local/lib/R/lib/libRlapack.so
##
## locale:
## [1] C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggplot2_3.3.5 patchwork_1.1.2 dplyr_1.0.7
##
## loaded via a namespace (and not attached):
## [1] softImpute_1.4 graphlayouts_0.7.0
## [3] pbapply_1.4-2 lattice_0.20-41
## [5] haven_2.3.1 vctrs_0.3.8
## [7] usethis_2.0.1 dynwrap_1.2.1
## [9] blob_1.2.1 survival_3.2-13
## [11] prodlim_2019.11.13 dynutils_1.0.5
## [13] later_1.3.0 DBI_1.1.1
## [15] R.utils_2.11.0 SingleCellExperiment_1.8.0
## [17] rappdirs_0.3.3 uwot_0.1.8
## [19] dqrng_0.2.1 jpeg_0.1-8.1
## [21] zlibbioc_1.32.0 pspline_1.0-18
## [23] pcaMethods_1.78.0 mvtnorm_1.1-1
## [25] htmlwidgets_1.5.4 GlobalOptions_0.1.2
## [27] future_1.22.1 UpSetR_1.4.0
## [29] laeken_0.5.2 leiden_0.3.3
## [31] clustree_0.4.3 parallel_3.6.3
## [33] scater_1.14.6 irlba_2.3.3
## [35] DEoptimR_1.0-9 tidygraph_1.1.2
## [37] Rcpp_1.0.9 readr_2.0.2
## [39] KernSmooth_2.23-17 carrier_0.1.0
## [41] promises_1.1.0 gdata_2.18.0
## [43] DelayedArray_0.12.3 limma_3.42.2
## [45] graph_1.64.0 RcppParallel_5.1.4
## [47] Hmisc_4.4-0 fs_1.5.2
## [49] RSpectra_0.16-0 fastmatch_1.1-0
## [51] ranger_0.12.1 digest_0.6.25
## [53] png_0.1-7 sctransform_0.2.1
## [55] cowplot_1.0.0 DOSE_3.12.0
## [57] ggvenn_0.1.9 here_1.0.1
## [59] TInGa_0.0.0.9000 ggraph_2.0.3
## [61] pkgconfig_2.0.3 GO.db_3.10.0
## [63] DelayedMatrixStats_1.8.0 gower_0.2.1
## [65] ggbeeswarm_0.6.0 iterators_1.0.12
## [67] DropletUtils_1.6.1 reticulate_1.26
## [69] clusterProfiler_3.14.3 SummarizedExperiment_1.16.1
## [71] circlize_0.4.15 beeswarm_0.4.0
## [73] GetoptLong_1.0.5 xfun_0.35
## [75] bslib_0.3.1 zoo_1.8-10
## [77] tidyselect_1.1.0 reshape2_1.4.4
## [79] purrr_0.3.4 ica_1.0-2
## [81] pcaPP_1.9-73 viridisLite_0.3.0
## [83] rtracklayer_1.46.0 rlang_1.0.2
## [85] hexbin_1.28.1 jquerylib_0.1.4
## [87] dyneval_0.9.9 glue_1.4.2
## [89] RColorBrewer_1.1-2 matrixStats_0.56.0
## [91] stringr_1.4.0 lava_1.6.7
## [93] europepmc_0.3 DESeq2_1.26.0
## [95] recipes_0.1.17 labeling_0.3
## [97] httpuv_1.5.2 class_7.3-17
## [99] BiocNeighbors_1.4.2 DO.db_2.9
## [101] annotate_1.64.0 jsonlite_1.7.2
## [103] XVector_0.26.0 bit_4.0.4
## [105] mime_0.9 aquarius_0.1.5
## [107] Rsamtools_2.2.3 gridExtra_2.3
## [109] gplots_3.0.3 stringi_1.4.6
## [111] processx_3.5.2 gsl_2.1-6
## [113] bitops_1.0-6 cli_3.0.1
## [115] batchelor_1.2.4 RSQLite_2.2.0
## [117] randomForest_4.6-14 tidyr_1.1.4
## [119] data.table_1.14.2 rstudioapi_0.13
## [121] org.Mm.eg.db_3.10.0 GenomicAlignments_1.22.1
## [123] nlme_3.1-147 qvalue_2.18.0
## [125] scran_1.14.6 locfit_1.5-9.4
## [127] scDblFinder_1.1.8 listenv_0.8.0
## [129] ggthemes_4.2.4 gridGraphics_0.5-0
## [131] R.oo_1.24.0 dbplyr_1.4.4
## [133] BiocGenerics_0.32.0 TTR_0.24.2
## [135] readxl_1.3.1 lifecycle_1.0.1
## [137] timeDate_3043.102 ggpattern_0.3.1
## [139] munsell_0.5.0 cellranger_1.1.0
## [141] R.methodsS3_1.8.1 proxyC_0.1.5
## [143] visNetwork_2.0.9 caTools_1.18.0
## [145] codetools_0.2-16 Biobase_2.46.0
## [147] GenomeInfoDb_1.22.1 vipor_0.4.5
## [149] lmtest_0.9-38 msigdbr_7.5.1
## [151] htmlTable_1.13.3 triebeard_0.3.0
## [153] lsei_1.2-0 xtable_1.8-4
## [155] ROCR_1.0-7 BiocManager_1.30.10
## [157] scatterplot3d_0.3-41 abind_1.4-5
## [159] farver_2.0.3 parallelly_1.28.1
## [161] RANN_2.6.1 askpass_1.1
## [163] GenomicRanges_1.38.0 RcppAnnoy_0.0.16
## [165] tibble_3.1.5 ggdendro_0.1-20
## [167] cluster_2.1.0 future.apply_1.5.0
## [169] Seurat_3.1.5 dendextend_1.15.1
## [171] Matrix_1.3-2 ellipsis_0.3.2
## [173] prettyunits_1.1.1 lubridate_1.7.9
## [175] ggridges_0.5.2 igraph_1.2.5
## [177] RcppEigen_0.3.3.7.0 fgsea_1.12.0
## [179] remotes_2.4.2 scBFA_1.0.0
## [181] destiny_3.0.1 VIM_6.1.1
## [183] testthat_3.1.0 htmltools_0.5.2
## [185] BiocFileCache_1.10.2 yaml_2.2.1
## [187] utf8_1.1.4 plotly_4.9.2.1
## [189] XML_3.99-0.3 ModelMetrics_1.2.2.2
## [191] e1071_1.7-3 foreign_0.8-76
## [193] withr_2.5.0 fitdistrplus_1.0-14
## [195] BiocParallel_1.20.1 xgboost_1.4.1.1
## [197] bit64_4.0.5 foreach_1.5.0
## [199] robustbase_0.93-9 Biostrings_2.54.0
## [201] GOSemSim_2.13.1 rsvd_1.0.3
## [203] memoise_2.0.0 evaluate_0.18
## [205] forcats_0.5.0 rio_0.5.16
## [207] geneplotter_1.64.0 tzdb_0.1.2
## [209] caret_6.0-86 ps_1.6.0
## [211] DiagrammeR_1.0.6.1 curl_4.3
## [213] fdrtool_1.2.15 fansi_0.4.1
## [215] highr_0.8 urltools_1.7.3
## [217] xts_0.12.1 GSEABase_1.48.0
## [219] acepack_1.4.1 edgeR_3.28.1
## [221] checkmate_2.0.0 scds_1.2.0
## [223] cachem_1.0.6 npsurv_0.4-0
## [225] babelgene_22.3 rjson_0.2.20
## [227] openxlsx_4.1.5 ggrepel_0.9.1
## [229] clue_0.3-60 rprojroot_2.0.2
## [231] stabledist_0.7-1 tools_3.6.3
## [233] sass_0.4.0 nichenetr_1.1.1
## [235] magrittr_2.0.1 RCurl_1.98-1.2
## [237] proxy_0.4-24 car_3.0-11
## [239] ape_5.3 ggplotify_0.0.5
## [241] xml2_1.3.2 httr_1.4.2
## [243] assertthat_0.2.1 rmarkdown_2.18
## [245] boot_1.3-25 globals_0.14.0
## [247] R6_2.4.1 Rhdf5lib_1.8.0
## [249] nnet_7.3-14 RcppHNSW_0.2.0
## [251] progress_1.2.2 genefilter_1.68.0
## [253] statmod_1.4.34 gtools_3.8.2
## [255] shape_1.4.6 HDF5Array_1.14.4
## [257] BiocSingular_1.2.2 rhdf5_2.30.1
## [259] splines_3.6.3 AUCell_1.8.0
## [261] carData_3.0-4 colorspace_1.4-1
## [263] generics_0.1.0 stats4_3.6.3
## [265] base64enc_0.1-3 dynfeature_1.0.0
## [267] smoother_1.1 gridtext_0.1.1
## [269] pillar_1.6.3 tweenr_1.0.1
## [271] sp_1.4-1 ggplot.multistats_1.0.0
## [273] rvcheck_0.1.8 GenomeInfoDbData_1.2.2
## [275] plyr_1.8.6 gtable_0.3.0
## [277] zip_2.2.0 knitr_1.41
## [279] ComplexHeatmap_2.14.0 latticeExtra_0.6-29
## [281] biomaRt_2.42.1 IRanges_2.20.2
## [283] fastmap_1.1.0 ADGofTest_0.3
## [285] copula_1.0-0 doParallel_1.0.15
## [287] AnnotationDbi_1.48.0 vcd_1.4-8
## [289] babelwhale_1.0.1 openssl_1.4.1
## [291] scales_1.1.1 backports_1.2.1
## [293] S4Vectors_0.24.4 ipred_0.9-12
## [295] enrichplot_1.6.1 hms_1.1.1
## [297] ggforce_0.3.1 Rtsne_0.15
## [299] shiny_1.7.1 numDeriv_2016.8-1.1
## [301] polyclip_1.10-0 grid_3.6.3
## [303] lazyeval_0.2.2 Formula_1.2-3
## [305] tsne_0.1-3 crayon_1.3.4
## [307] MASS_7.3-54 pROC_1.16.2
## [309] viridis_0.5.1 dynparam_1.0.0
## [311] rpart_4.1-15 zinbwave_1.8.0
## [313] compiler_3.6.3 ggtext_0.1.0